package com.cmge.ad.spider.pic.meinv;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

import com.cmge.ad.util.HttpClientUtil;
import com.cmge.ad.util.JsonUtil;
import com.cmge.ad.util.SuprUtil;

/**
 * @desc	百度抓取
 * 
 * 			http://image.baidu.com/channel?c=美女
 * 			http://image.baidu.com/channel?c=%E7%BE%8E%E5%A5%B3
 * 			http://image.baidu.com/data/imgs?col=%E7%BE%8E%E5%A5%B3&tag=%E5%85%A8%E9%83%A8&sort=0&tag3=&pn=0&rn=60&p=channel&from=1
 * 			有30000张  每个分页60 有500页
 * 
 * 
 * 			详情：http://image.baidu.com/search/detail?word=%E4%BF%A1%E6%81%AF%E5%9B%BE%E8%A1%A8&tn=baiduimagedetail&os=182542327%2C200638614
 * 			os来自于接口返回的os
 * 			接口： http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&word=%E4%BF%A1%E6%81%AF%E5%9B%BE%E8%A1%A8&pn=120&rn=60
 * @author	ljt
 * @time	2014-12-30 下午7:51:34
 */
public class DuiTangImagChannelCrawl implements PageProcessor {
	
    private Site site = Site.me().setRetryTimes(3).setSleepTime(100);

    private static int pageNum = 0;
    
    private static int maxPageNumber = 1000;
    
    private static String keyWord;
    
    private static String detailUrl;
    
    private static String url;
    
    private static List<String> resultList = new ArrayList<String>();
    
    private boolean flag = true;
    
    private static List<String> crawlPicList = new ArrayList<String>();
    
    static{
    	try {
			keyWord = "壁纸";
			url = "http://www.duitang.com/category/wallpaper/?srcname=%E5%A3%81%E7%BA%B8_%E9%A3%8E%E6%99%AF&_type=&page=";
			detailUrl = "http://image.baidu.com/search/detail?word="+URLEncoder.encode(keyWord, "utf-8")+"&tn=baiduimagedetail&os=";
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
		}
    }
    
    @Override
    public void process(Page page) {
		String picUrl = null;
		try {
//			System.out.println(page.getHtml().get());
			picUrl = page.getHtml().xpath("//a[@class='vieworg']/@href").get().toString();
		} catch (Exception e) {
			e.printStackTrace();
		}
		
		System.out.println(picUrl);
		crawlPicList.add(picUrl);
		
		if(flag){
			flag = false;
			startThread();
			for(String id : resultList){
				String url = "http://www.duitang.com/people/mblog/"+id+"/detail/" ;
				page.addTargetRequest(url);
    		}
		}
    }

    @Override
    public Site getSite() {
        return site;
    }

    public static void main(String[] args) throws Exception {
    	
    	// 循环调用接口
    	while(pageNum <= maxPageNumber){
    		// 调用接口
    		BaiduImageResult temp = (BaiduImageResult) JsonUtil.getGson().fromJson(HttpClientUtil.getJson(url+pageNum), BaiduImageResult.class) ;
    		try {
				if(temp.getData() != null && SuprUtil.isEmptyCollection(temp.getData().getBlogs())){
					break;
				}else{
					for(Blog blog : temp.getData().getBlogs()){
						resultList.add(blog.getId());
					}
					pageNum++;
				}
			} catch (Exception e) {
				break;
			}
    	}
    	
    	System.out.println(resultList.size());
    	
//    	startThread();
    	
    	Spider qsSpider = Spider.create(new DuiTangImagChannelCrawl())
    					.addUrl("http://www.duitang.com/people/mblog/449800956/detail/")
//    					.addPipeline(new RedisPipeline())
//    					.addPipeline(new JsonFilePipeline())
//    					.addPipeline(new JsonPipeline())
//    					.addPipeline(new MysqlPicturePipeline())
    					.thread(1);
    	qsSpider.start();
    	
    }	
	
	
    private static void startThread() {
		
    	// 启动线程下载图片
    	Thread t = new Thread(new Runnable() {
			
    		int i = 0;
    		
			@Override
			public void run() {
				while(true){
					if(crawlPicList.size() > i && crawlPicList.get(i) != null){
						// 下载图片到本地
						uploadPic(crawlPicList.get(i));
						i++;
					}else{
						try {
							Thread.sleep(1000);
						} catch (InterruptedException e) {
							e.printStackTrace();
						}
					}
				}
			}

			private void uploadPic(String picUrl) {
				System.out.println("正在下载："+picUrl);
				
				// 下载网络文件
		        int byteread = 0;

		        try {
		        	URL url = new URL(picUrl);
		            URLConnection conn = url.openConnection();
		            conn.setReadTimeout(1000);
		            conn.setConnectTimeout(1000);
		            InputStream inStream = conn.getInputStream();
		            String fileName = picUrl.substring(picUrl.lastIndexOf("/") + 1).toUpperCase();
		            File file = new File("E:/baiduPic/"+keyWord);
		            if(!file.exists()){
		            	file.mkdir();
		            }
		            FileOutputStream fs = new FileOutputStream("E:/baiduPic/"+keyWord+"/"+fileName);

		            byte[] buffer = new byte[1204];
		            while ((byteread = inStream.read(buffer)) != -1) {
		                fs.write(buffer, 0, byteread);
		            }
		        } catch (FileNotFoundException e) {
		            e.printStackTrace();
		        } catch (IOException e) {
		            e.printStackTrace();
		        }
			}
		});
    	
    	t.start();
		
	}


	static class BaiduImageResult{
    	private BaiduImage data;

		public BaiduImage getData() {
			return data;
		}

		public void setData(BaiduImage data) {
			this.data = data;
		}

    }
    
	 static class Blog{
		 private String id;

		public String getId() {
			return id;
		}

		public void setId(String id) {
			this.id = id;
		}
	 }
	
    static class BaiduImage{
    	private ArrayList<Blog> blogs;

		public ArrayList<Blog> getBlogs() {
			return blogs;
		}

		public void setBlogs(ArrayList<Blog> blogs) {
			this.blogs = blogs;
		}
    }
}
